package com.atguigu.url.util;

import com.atguigu.url.constant.RegModConst;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;

import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @version V1.0
 * @author: guangcheng
 * @date: 2021/5/22 20:18
 * @description:
 */
@Component
public class RegUtils {

    /**
     * @param regStr  正则表达式
     * @param content 要解析的内容
     * @return
     */
    public Matcher getMatcher(String regStr, String content) {

        Pattern pattern = Pattern.compile(regStr);

        if (!StringUtils.isEmpty(content)) {

            Matcher matcher = pattern.matcher(content);
            return matcher;
        }
        return null;
    }


    /**
     * @param content
     * @return
     */
    public Set<String> findUrlSetFromTextOrStr(String content) {
        Set<String> urlSet = new HashSet<>();
        //String regStrHttp = "((http[s]{0,1})://)(([a-zA-Z0-9\\._-]+\\.[a-zA-Z]{2,6})|([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\\&%_\\./-~-]*)?";

        if (!StringUtils.isEmpty(content)) {

            Matcher matcher = getMatcher(RegModConst.regStrHttp, content);
            while (matcher.find()) {
                String url = matcher.group(0);
                urlSet.add(url);
            }
        }

        return urlSet;
    }


    /**
     * 自定义正则模式
     *
     * @param content
     * @param regMod
     * @return
     */
    public Set<String> findUrlSetFromTextOrStr(String content, String regMod) {
        Set<String> urlSet = new HashSet<>();

        if (!StringUtils.isEmpty(content)) {

            Matcher matcher = getMatcher(regMod, content);
            while (matcher.find()) {
                String url = matcher.group(0);
                urlSet.add(url);
            }
        }

        return urlSet;
    }

}

